#!/usr/bin/env python
#coding=utf-8

"""must manaul install eventlet package"""

import eventlet,re
from eventlet.green import urllib2

url_list = ['http://www.l99.com/']

def fetch(url):
	"""利用urllib中的库直接打开"""
	buf = urllib2.urlopen(url).read()
	print buf
	# return url, buf

def async_fetch(url_list):
	"""eventlet中协程实现多"""

	#创建5个线程池
	pool = eventlet.GreenPool(5)
	#抓取内容列表
	list_buf = []
	for url, buf in pool.imap(fetch, url_list):
		# print url, len(buf)
		list_buf.append(buf)
	set_url = set()
	for buf in list_buf:
		list_url = re.findall('<a href="(http.*)"', buf)
		set_url |= set_url
	print '\n'.join(set_url)
	# return list(set_url)

def async_fetch_file(ifn='url.test'):
	""""""
	list_url = []
	for i in open(ifn):
		i = i.strip()
		if not i:continue
		list_url.append(i)
	more_url = async_fetch(list_url)
	open(ifn, 'w+').write('\n'.join(more_url))
	print 'get url length', more_url

if __name__ == '__main__':
	# fetch(url_list[0])
	async_fetch(url_list)
	# async_fetch_file()